---
title: "Equivalence Testing for Regression Discontinuity Designs -- Simulations"
author: "Erin Hartman"
output: html_document
---

# Setup

### Load packages and print session info
```{r setup, include=TRUE, warning = FALSE}
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
library(rdrobust)
library(rddensity)
library(rdd)

library(parallel)
library(kableExtra)
library(knitr)
library(tidyverse)

start_time <- Sys.time()

## Last run date
print(Sys.time())

print(sessionInfo())

## number of cores
## Simulations use 1 less than number of cores
print(detectCores())
```

### Load previous results
Use this to load previous simulation results, if desired.

```{r}
load_results <- NULL
# load_results <- load("")

run_sims <- !exists("load_results") || is_null(load_results)
```

### Load Functions

Load the RDD Equivalence functions from Github.  At time of publication, the repository was at commit
`f3836d2d4cf663e86a9b69752cc28b0a521f556f` which is loaded below.
```{r}
devtools::source_url("https://github.com/ekhartman/rdd_equivalence/blob/f3836d2d4cf663e86a9b69752cc28b0a521f556f/RDD_equivalence_functions.R?raw=TRUE")
```

# Simulations

### Simulation Parameters

```{r}
nsim = 1000
eps = 2.5
eps_den = 1.5
alpha = 0.05
parallel = TRUE
```

### Run simulations

```{r, eval = run_sims}
set.seed(97593845)
  
  full_sim = NULL
  for(tau_den in c(1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7)) {
    if(tau_den == 1) {
      taus_to_test = c(0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5)
    } else {
      taus_to_test = 0
    }
     for(tau in taus_to_test) {
      cat("\n\ntau_den: ", tau_den, "\n", sep = "")
      cat("tau: ", tau, "\n", sep = "")
      cat("N: ")
      for(N in c(50, 100, 1000, 5000, 10000)) {
        cat(N, ", ", sep = "")
        sim = mclapply(1:nsim, function(i) {
          
          # DGP
          x = rnorm(N, 0, 1)
          
          ## jump in density
          x[x < 0] <- x[ x < 0] * tau_den
          
          err = rnorm(N, 0, 1.295)
          tau_i = rnorm(N, abs(x / 10) + 2, 0.5)
          
          ## DGP Based on Calonico, Cattaneo, Titiunik (2014)
          x <- x / 10
          y1 = ifelse(x < 0
                     , 12.7 * x + 71.8 * x^2 + 202.1 * x^3 + 215.4 * x^4 + 73.3 * x^5
                     , 8.4 * x - 30 * x^2 + 79.9 * x^3 - 90.1 * x^4 + 35.6 * x^5) + 48 + ifelse(x < 0, 0, tau)
          
          x <- x * 10
          
          ## Get point estimate from rdrobust
          point = tryCatch(rdrobust(y= y1 + err, x = x, bwselect = "mserd"), error = function(e) NA) 
          
          if(length(point) == 1) {
            ## If no estimate, set to NA
            tost = NA
            equiv = NA
          } else {
            ## 90% CI inclusion method
            tost = tryCatch(rdd.tost(point$coef[3], point$se[3], eps = eps), error = function(e) NA)
            
            ## Equivalence t-test method
            equiv = tryCatch(rdd.equiv(point$coef[3], point$se[3], eps = eps), error = function(e) NA)  
          }
          
          ## Equivalence-based density test
          den_test = tryCatch(rddensity(X = x, vce="plugin", all = TRUE), error = function(e) NA)
          
          ## McCrary based density test
          mcrary = tryCatch(DCdensity(x, 0, plot = FALSE), error = function(e) NA)
          
          if(!is.na(den_test)) {
            equiv_den = tryCatch(rdd.tost.ratio(den_test$hat$left, den_test$hat$right, den_test$sd_asy$left, den_test$sd_asy$right, eps = eps_den), error = function(e) NA)  
          } else {
            equiv_den = NA
          }
          
          
          list( point_estimate = tryCatch(point$coef[3], error = function(e) NA)
                , point_rej_tost = tryCatch(tost$rej, error = function(e) NA)
                , point_p_tost = tryCatch(tost$p, error = function(e) NA)
                , point_inverted_tost = tryCatch(tost$inverted, error = function(e) NA)
                , point_tau = tryCatch(tau, error = function(e) NA)
                , den_tau = tryCatch(tau_den, error = function(e) NA)
                , N = tryCatch(N, error = function(e) NA)
                , point_N_h = tryCatch(sum(point$Nh), error = function(e) NA)
                , point_standard_p = tryCatch(point$pv[3], error = function(e) NA)
                , point_rej_equiv = tryCatch(equiv$rej, error = function(e) NA)
                , point_p_equiv = tryCatch(equiv$p, error = function(e) NA)
                , point_inverted_equiv = tryCatch(equiv$inverted, error = function(e) NA)
                
                , den_est = tryCatch(den_test$hat$left/den_test$hat$right, error = function(e) NA)
                , den_equiv_p = tryCatch(equiv_den$p, error = function(e) NA)
                , den_nht_p = tryCatch(den_test$test$p_asy, error = function(e) NA)
                , den_lower_ci = tryCatch(den_test$hat$diff - qnorm(0.95) * den_test$sd_asy$diff, error = function(e) NA)
                , den_upper_ci = tryCatch(den_test$hat$diff + qnorm(0.95) * den_test$sd_asy$diff, error = function(e) NA)
                , den_equiv_ci_lower = tryCatch(1/equiv_den$inverted, error = function(e) NA)
                , den_equiv_ci_upper = tryCatch(equiv_den$inverted, error = function(e) NA)
                , den_est_left = tryCatch(den_test$hat$left, error = function(e) NA)
                , den_est_right = tryCatch(den_test$hat$right, error = function(e) NA)
                , den_N_eff_left = tryCatch(den_test$N$eff_left, error = function(e) NA)
                , den_N_eff_right = tryCatch(den_test$N$eff_right, error = function(e) NA)
                , den_mcrary_p = tryCatch(mcrary, error = function(e) NA))
        }, mc.cores = ifelse(parallel, detectCores(), 1)) %>% bind_rows()
        full_sim = bind_rows(full_sim, sim)
      }
    }
  }

save(full_sim, file = paste0("./sims_", gsub("-", "", lubridate::today()), ".RData"))
```

# Plots

First, transform the data for summary statistics, including probability of rejection at the 0.05 level.
```{r}
sim_res <- full_sim %>% group_by(point_tau, den_tau, N) %>% summarize(
      rej_point_equiv = mean(point_rej_equiv, na.rm = TRUE), 
      rej_point_tost = mean(point_rej_tost, na.rm = TRUE), 
      rej_point_nht = mean(point_standard_p > 0.05, na.rm = TRUE), 
      rej_den = mean(den_equiv_p < 0.05, na.rm = TRUE),
      rej_den_mcp = mean(den_mcrary_p > 0.05, na.rm = TRUE),
      rej_den_nht = mean(den_nht_p > 0.05, na.rm = TRUE)
      )
```

### Figure 1

Simulation results for the equivalence test for continuity in a pre-treatment covariate.


```{r}
sim_res %>% gather(key = test, value = rej, -N, -point_tau, -den_tau, -contains("cov"), -contains("per")) %>% filter(den_tau == 1 & !str_detect(test, "den")) %>% mutate(test = factor(test, levels = c("rej_point_equiv", "rej_point_tost", "rej_point_nht"))) %>%
  filter(N %in% c(50, 100, 1000)) %>%
  ggplot() + geom_rect(data = data.frame(xstart = eps, xend = Inf), aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf), fill = "gray", alpha = 0.15) + theme_bw() +
  geom_line(aes(x = point_tau, y = rej, linetype = as.factor(test), color = as.factor(N))) + geom_hline(yintercept = 0.05) +
  ggtitle("Equivalence Continuity Tests vs Tests of Difference") +
  annotate("text", x = 1.25, 1.2, label = "Equivalence Holds") +
  annotate("text", x = 3.25, 1.15, label = "Equivalence Does\n Not Hold") +
  ylab("Fraction Considered Equivalent") +
  xlab("True Size of Discontinuous Jump at Cutoff") +
  #xlim(c(0, 0.9)) +
  theme(legend.position = "bottom", legend.box = "horizontal", legend.direction = "vertical", legend.key.height = unit(0.24, "in"), plot.title = element_text(hjust = 0.5),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) +
  guides(color = guide_legend(title.position="top", title.hjust = 0.5,
                              keywidth=0.15,
                              keyheight=0.15,
                              default.unit="inch"),
         linetype = guide_legend(title.position="top", title.hjust = 0.5,
                              keywidth=0.15,
                              keyheight=0.15,
                              default.unit="inch")) +
  scale_color_grey(name = "Sample Size", start = 0.7, end = 0) +
  scale_linetype_discrete(name = "Type of Test", labels = c("Equiv. T-test", "Equiv. Interval Inclusion", "Trad. NHT")) +
  xlim(c(0, 3.75))

ggsave("./equiv_tests_continuity.pdf", width = 5, height = 4.5)
```

### Figure 2

Simulation results for the equivalence test for continuity in the density function.


```{r}
sim_res %>%
  gather(key = test, value = rej, -N, -point_tau, -den_tau, -contains("cov"), -contains("per")) %>% filter(point_tau == 0 & !str_detect(test, "point")) %>%
  filter(N %in% c(100, 5000, 10000)) %>%
  ggplot() + geom_rect(data = data.frame(xstart = eps_den, xend = Inf), aes(xmin=xstart, xmax=xend, ymin=-Inf, ymax=Inf), fill = "gray", alpha = 0.15) + theme_bw() +
  geom_line(aes(x = den_tau, y = rej, linetype = as.factor(test), color = as.factor(N))) + geom_hline(yintercept = 0.05) +
  ggtitle("Equivalence Density Tests vs Tests of Difference") +
  annotate("text", x = 1.25, 1.2, label = "Equivalence Holds") +
  annotate("text", x = 1.65, 1.15, label = "Equivalence Does\n Not Hold") +
  ylab("Fraction Considered Equivalent") +
  xlab("True Density Imbalance at Cutoff") +
  theme(legend.position = "bottom", legend.box = "horizontal", legend.direction = "vertical", legend.key.height = unit(0.24, "in"), plot.title = element_text(hjust = 0.5),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) +
  guides(color = guide_legend(title.position="top", title.hjust = 0.5,
                              keywidth=0.15,
                              keyheight=0.15,
                              default.unit="inch"),
         linetype = guide_legend(title.position="top", title.hjust = 0.5,
                              keywidth=0.15,
                              keyheight=0.15,
                              default.unit="inch")) +
  scale_color_grey(name = "Sample Size", start = 0.7, end = 0) +
  scale_linetype_discrete(name = "Type of Test", labels = c("Equivalence", "McCrary NHT", "CJM NHT")) +
  xlim(c(1, 1.75))

ggsave("./equiv_tests_density.pdf", width = 5, height = 4.5)
```


### Figure SI-1

This is one example draw from the DGP.

```{r}
set.seed(9738)
plot_data = NULL
for(tau in c(0, 2)) {
  for(tau_den in c(1, 1.5)) {
      N = 1000
      x = rnorm(N, 0, 20)
      
      x[x < 0] <- x[ x < 0] * tau_den
      
      err = rnorm(N, 0, 1.295)
      tau_i = rnorm(N, abs(x / 10) + 2, 0.5)
      
      x <- x / 10
          y1 = ifelse(x < 0
                     , 12.7 * x + 71.8 * x^2 + 202.1 * x^3 + 215.4 * x^4 + 73.3 * x^5
                     , 8.4 * x - 30 * x^2 + 79.9 * x^3 - 90.1 * x^4 + 35.6 * x^5) + 48 + ifelse(x < 0, 0, tau)
          
      x <- x * 10
      
      plot_data = rbind(plot_data, data.frame(x = x, y = y1 + err - 50, cef = y1 - 50, Treatment = ifelse(x < 0, "control", "treated"), tau = ifelse(tau == 0, "Continuous CEF", "Discontinuous Jump of 2"), tau_den = ifelse(tau_den == 1, "Continuous Density", "Density Jump of 1.5")))
  }
}
```

Top panel of Figure SI-1 (pre-treatment covariate).
```{r}
ggplot(plot_data, aes(x = x)) + geom_point(aes(y = y), alpha = 0.08) +
  geom_line(aes(y = cef, col = Treatment), size = 1.75) +
  theme_bw() + 
  ggtitle("Example Simulation -- n = 1000 -- CEF") + 
  theme(legend.position = "bottom") + 
  facet_grid(tau ~ tau_den) + guides(col = "none") + 
  ylab("Z") +
  scale_color_grey() +
  xlim(c(-10, 10)) +
  ylim(c(-10, 5))

ggsave("sim_cef.pdf", width = 6, height = 5)
```

Lower panel of Figure SI-1 (density function).
```{r}
ggplot(plot_data, aes(x = x)) + 
  geom_histogram(aes(y = ..count../sum(..count..) * 100, fill = Treatment), position =  "identity", binwidth = 4, center = 2) +
  theme_bw() + 
  ggtitle("Example Simulation-- n = 1000 -- Density") + 
  theme(legend.position = "bottom") + 
  facet_grid(tau ~ tau_den) + 
  ylab("density") +
  scale_fill_grey()

ggsave("sim_den.pdf", width = 6, height = 5)
```

Total run time is `r round(difftime(Sys.time(), start_time, units = "min"), 2)` minutes.